import numpy as np
import pandas as pd
import os
missing_values = ["n/a","na","--"]
data = pd.read_csv('company_data.csv', na_values = missing_values)
data.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 141 entries, 0 to 140 Data columns (total 36 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 Company_name 140 non-null object 1 role 118 non-null object 2 year_spent 117 non-null float64 3 exp_year 118 non-null float64 4 salary_satisfiction 118 non-null float64 5 working_hour_satisfiction 119 non-null float64 6 team_collaboration 119 non-null float64 7 company_premises 118 non-null float64 8 management_attitude 114 non-null object 9 Sentiment 107 non-null object 10 tech_in_management 103 non-null object 11 good_onboarding_process 117 non-null float64 12 kind_of_company 131 non-null object 13 work_in_future 131 non-null object 14 employee_number(approx) 130 non-null object 15 hiring_and_firing_rate 130 non-null float64 16 experienced_developer_number (2-5 years) 129 non-null object 17 client_types 130 non-null object 18 used_technology 130 non-null object 19 numberof_management_people 130 non-null object 20 owner_of_product 81 non-null object 21 company_operation 82 non-null object 22 marketing_expenditure (monthly) 82 non-null object 23 projected_customer_number 50 non-null float64 24 estimated_valuation 45 non-null float64 25 poduct_uniqueness_reason 48 non-null object 26 services 100 non-null object 27 company_operation_technique 101 non-null object 28 mentor/advisor 101 non-null float64 29 produce_the_actual_product 101 non-null float64 30 any_systematic_method_followed 101 non-null object 31 stage_of_business 101 non-null object 32 having_minimum_viable_product 101 non-null object 33 have_testing_phase 101 non-null float64 34 number_of_testers 101 non-null object 35 keeping_promises 141 non-null int64 dtypes: float64(13), int64(1), object(22) memory usage: 39.8+ KB
data1 = data.copy()
data1
| Company_name | role | year_spent | exp_year | salary_satisfiction | working_hour_satisfiction | team_collaboration | company_premises | management_attitude | Sentiment | ... | services | company_operation_technique | mentor/advisor | produce_the_actual_product | any_systematic_method_followed | stage_of_business | having_minimum_viable_product | have_testing_phase | number_of_testers | keeping_promises | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | chaldal ltd. | Back end developer | 0.9 | 0.9 | 1.0 | 1.0 | 2.0 | 2.0 | Satisfactory | Good | ... | Groceries and daily need items | Revenue based | 1.0 | 0.0 | 1 | profit stage | 1 | 1.0 | 0 to 10 | 0 |
| 1 | lexoro gmbh | Back end developer | 1.0 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | Oddly nice. | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 2 | hamid fabrics limited | Tester | 1.0 | 3.0 | 1.0 | 1.0 | 2.0 | 2.0 | Very nice | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 3 | samsung r&d | Front end developer | 1.2 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | nice | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | eta infotech ltd | Full Stack | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | Satisfactory | Good | ... | Banks and financial organizations | Revenue | 1.0 | 1.0 | 1 | profit stage | 1 | NaN | NaN | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 136 | pilu | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | SureCash is the fastest growing Fintech Compan... | Revenue Based | 0.0 | 1.0 | 1 | Growth Stage | 1 | 0.0 | 21 to 30 | 2 |
| 137 | provashi | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Sindabad.com Ltd. is a business-to-business (B... | Angel Investor | 1.0 | 0.0 | 0 | Profit Stage | 1 | 0.0 | 0 to 10 | 2 |
| 138 | radassist | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | HandyMama is the pioneering web and mobile bas... | Venture Capitalist funding | 0.0 | 1.0 | 1 | Growth Stage | on process | 1.0 | 0 to 10 | 1 |
| 139 | scientiko.com | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Obhai is a ride-sharing app enabling passenger... | Revenue Based | 0.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 11 to 20 | 1 |
| 140 | vobon | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Bagdoom.com has been designed with splendid ex... | Angel Investor | 0.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 0 to 10 | 0 |
141 rows × 36 columns
#cleaning the data
data = pd.DataFrame(data)
cleaned_data = data.dropna()
cleaned_data
| Company_name | role | year_spent | exp_year | salary_satisfiction | working_hour_satisfiction | team_collaboration | company_premises | management_attitude | Sentiment | ... | services | company_operation_technique | mentor/advisor | produce_the_actual_product | any_systematic_method_followed | stage_of_business | having_minimum_viable_product | have_testing_phase | number_of_testers | keeping_promises | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 14 | optagen tech0logies limited | SQA | 3.0 | 3.0 | 1.0 | 1.0 | 2.0 | 2.0 | Very nice behavior and tehy expectation is ade... | Good | ... | Tech0logical products | Self Funding | 1.0 | 1.0 | 0 | Idea Stage | on process | 1.0 | 0 to 10 | 2 |
| 15 | in0va tech | Front end | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | Fiendly and realestic | Good | ... | webdev, android dev, e commerce, erp, sme | Self Funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 2 |
| 16 | misfit tech0logies ltd. | Full stack developer | 5.0 | 5.0 | 1.0 | 1.0 | 2.0 | 2.0 | Good but THE CTO is Not a technical person so ... | Moderate | ... | Artificial Intelligence, Intelligent Bot, ERP ... | Revenue Based | 1.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 21 to 30 | 2 |
| 21 | energx | Font end develope | 5.0 | 3.0 | 1.0 | 1.0 | 2.0 | 2.0 | understanding about employees need | Good | ... | With the background rooted in software develop... | Venture Capitalist funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 2 |
| 33 | in0vatech | Front end | 2.0 | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | Friendly | Good | ... | SaaS, Web based products, Mobile Application | Revenue Based | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 1 |
| 40 | alice labs | Front end | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | Cares about the employee | Good | ... | We have built a powerful tool that uses Artifi... | Self Funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 1 |
| 41 | loop freight | Front end | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 1.0 | Fine and caring | Good | ... | Loop is a tech0logy-enabled logistics company ... | Revenue Based | 0.0 | 0.0 | 0 | Growth Stage | on process | 1.0 | 0 to 10 | 0 |
| 43 | ants aerial systems | Back end developer | 2.0 | 2.0 | 0.0 | 1.0 | 2.0 | 2.0 | The CEO is very great person and ares a lot | Good | ... | We are working on developing an Unmanned Air S... | Angel Investor | 1.0 | 1.0 | 1 | Idea Stage | on process | 1.0 | 0 to 10 | 0 |
| 44 | 0dding heads games | Full stack developer | 2.0 | 1.0 | 0.0 | 1.0 | 2.0 | 2.0 | puts very pressure on the developers | Bad | ... | Games | Self Funding | 1.0 | 1.0 | 1 | Idea Stage | 1 | 1.0 | 0 to 10 | 1 |
| 67 | bagdoom.com | Full stack developer | 5.0 | 1.0 | 1.0 | 0.0 | 2.0 | 1.0 | High expenctation without much payoff | Bad | ... | Bagdoom.com has been designed with splendid ex... | Angel Investor | 0.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 0 to 10 | 2 |
| 68 | ajkerdeal.com | Back end develope | 1.0 | 2.0 | 0.0 | 1.0 | 2.0 | 1.0 | Too much polities, senior members are unqualified | Bad | ... | Ajkerdeal is an online retail marketplace in B... | Venture Capitalist funding | 1.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 11 to 20 | 1 |
| 69 | ifarmer | Full stack developer | 5.0 | 3.0 | 0.0 | 1.0 | 1.0 | 2.0 | over te top expectation | Bad | ... | iFarmer is an impact tech startup that has dev... | Self Funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 11 to 20 | 0 |
| 86 | parallax logic | Back end developer | 4.0 | 1.0 | 0.0 | 0.0 | 1.0 | 2.0 | moderatae | Moderate | ... | With the background rooted in software develop... | Venture Capitalist funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 1 |
| 92 | tigrow | Front end | 2.0 | 2.0 | 0.0 | 0.0 | 2.0 | 1.0 | puts a lot of pressure but very low payoff | Bad | ... | We have launched our Team Collaboration Tool w... | Venture Capitalist funding | 1.0 | 0.0 | 1 | Growth Stage | on process | 1.0 | 0 to 10 | 1 |
| 94 | hastech | Full stack developer | 2.0 | 2.0 | 0.0 | 0.0 | 1.0 | 2.0 | High expectation with low payoff | Bad | ... | HasTech- Website and Mobile App Template Marke... | Angel Investor | 1.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 11 to 20 | 0 |
| 108 | sindabad.com | Front end | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | Very bad | Bad | ... | Sindabad.com Ltd. is a business-to-business (B... | Angel Investor | 1.0 | 0.0 | 0 | Profit Stage | 1 | 0.0 | 0 to 10 | 0 |
| 109 | tappocket | Front end | 2.0 | 2.0 | 0.0 | 0.0 | 1.0 | 1.0 | High expectation with low payoff | Bad | ... | Tappocket App-based game development studio Dh... | Revenue Based | 0.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 21 to 30 | 1 |
| 110 | tech0haven company ltd. | Back end develope | 2.0 | 2.0 | 0.0 | 0.0 | 1.0 | 1.0 | very poor management | Bad | ... | In the last three decades Tech0haven has compl... | Venture Capitalist funding | 1.0 | 1.0 | 1 | Growth Stage | 1 | 1.0 | 11 to 20 | 2 |
| 115 | i0vace tech0logies | Back end developer | 6.0 | 3.0 | 0.0 | 0.0 | 1.0 | 1.0 | Too picky and un copeative | Bad | ... | Full stack food delivery system using cloud ki... | Venture Capitalist funding | 1.0 | 0.0 | 1 | Growth Stage | 1 | 1.0 | 0 to 10 | 2 |
| 116 | wastech bangladesh | Full stack development | 1.0 | 1.0 | 0.0 | 0.0 | 1.0 | 1.0 | Not very kind and caring | Bad | ... | Tech0logical products | Self Funding | 1.0 | 1.0 | 0 | Idea Stage | on process | 1.0 | 0 to 10 | 1 |
20 rows × 36 columns
data['management_attitude'] = data['management_attitude'].astype(str)
import nltk
import re
import string
from string import punctuation
from nltk.corpus import stopwords
def clean_text(text):
"""
Make text lowercase, remove text in square brackets, remove links, remove punctuation
and remove words containing numbers
"""
text = text.lower()
text = re.sub('\[.*?\]','', text)
text = re.sub('https?://\S+|www\.\S+', '', text)
text = re.sub('<.*?>+', '', text)
text = re.sub('[%s]' % re.escape(string.punctuation), '', text)
text = re.sub('\n', '', text)
text = re.sub('\w*\d\w*', '', text)
return text
def punctuation_stopwords_removal(git_text):
remove_punctuation = [ch for ch in git_text if ch not in punctuation]
#convert them back to sentences and split into words
remove_punctuation = "".join(remove_punctuation).split()
filtered_git_text = [word.lower() for word in remove_punctuation if word.lower() not in stopwords.words('english')]
return filtered_git_text
data['management_attitude'] = data['management_attitude'].apply(lambda x: clean_text(x))
data['management_attitude'] = data['management_attitude'].apply(punctuation_stopwords_removal)
data.head()
| Company_name | role | year_spent | exp_year | salary_satisfiction | working_hour_satisfiction | team_collaboration | company_premises | management_attitude | Sentiment | ... | services | company_operation_technique | mentor/advisor | produce_the_actual_product | any_systematic_method_followed | stage_of_business | having_minimum_viable_product | have_testing_phase | number_of_testers | keeping_promises | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | chaldal ltd. | Back end developer | 0.9 | 0.9 | 1.0 | 1.0 | 2.0 | 2.0 | [satisfactory] | Good | ... | Groceries and daily need items | Revenue based | 1.0 | 0.0 | 1 | profit stage | 1 | 1.0 | 0 to 10 | 0 |
| 1 | lexoro gmbh | Back end developer | 1.0 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | [oddly, nice] | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 2 | hamid fabrics limited | Tester | 1.0 | 3.0 | 1.0 | 1.0 | 2.0 | 2.0 | [nice] | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 3 | samsung r&d | Front end developer | 1.2 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | [nice] | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | eta infotech ltd | Full Stack | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | [satisfactory] | Good | ... | Banks and financial organizations | Revenue | 1.0 | 1.0 | 1 | profit stage | 1 | NaN | NaN | 1 |
5 rows × 36 columns
df = data[['management_attitude','Company_name']]
sentiment_data = pd.DataFrame(df)
import plotly.express as px
from collections import Counter
def plot_most_common_terms(df):
word_list = []
for i, j in df.iterrows():
for word in j['management_attitude']:
word_list.append(word)
count_dict = Counter(word_list)
most_common_words_df = pd.DataFrame(count_dict.most_common(20), columns=['word', 'count'])
fig = px.histogram(most_common_words_df,
x='word',
y='count',
title='Most common terms used in management attitude',
color_discrete_sequence=['#D8E46B'] )
fig.show()
plot_most_common_terms(sentiment_data)
sentiment_data_final = data1[['management_attitude','Sentiment']]
data1
| Company_name | role | year_spent | exp_year | salary_satisfiction | working_hour_satisfiction | team_collaboration | company_premises | management_attitude | Sentiment | ... | services | company_operation_technique | mentor/advisor | produce_the_actual_product | any_systematic_method_followed | stage_of_business | having_minimum_viable_product | have_testing_phase | number_of_testers | keeping_promises | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | chaldal ltd. | Back end developer | 0.9 | 0.9 | 1.0 | 1.0 | 2.0 | 2.0 | Satisfactory | Good | ... | Groceries and daily need items | Revenue based | 1.0 | 0.0 | 1 | profit stage | 1 | 1.0 | 0 to 10 | 0 |
| 1 | lexoro gmbh | Back end developer | 1.0 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | Oddly nice. | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 2 | hamid fabrics limited | Tester | 1.0 | 3.0 | 1.0 | 1.0 | 2.0 | 2.0 | Very nice | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 2 |
| 3 | samsung r&d | Front end developer | 1.2 | 4.0 | 1.0 | 1.0 | 2.0 | 2.0 | nice | Good | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | 0 |
| 4 | eta infotech ltd | Full Stack | 1.0 | 1.0 | 1.0 | 1.0 | 2.0 | 2.0 | Satisfactory | Good | ... | Banks and financial organizations | Revenue | 1.0 | 1.0 | 1 | profit stage | 1 | NaN | NaN | 1 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 136 | pilu | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | SureCash is the fastest growing Fintech Compan... | Revenue Based | 0.0 | 1.0 | 1 | Growth Stage | 1 | 0.0 | 21 to 30 | 2 |
| 137 | provashi | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Sindabad.com Ltd. is a business-to-business (B... | Angel Investor | 1.0 | 0.0 | 0 | Profit Stage | 1 | 0.0 | 0 to 10 | 2 |
| 138 | radassist | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | HandyMama is the pioneering web and mobile bas... | Venture Capitalist funding | 0.0 | 1.0 | 1 | Growth Stage | on process | 1.0 | 0 to 10 | 1 |
| 139 | scientiko.com | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Obhai is a ride-sharing app enabling passenger... | Revenue Based | 0.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 11 to 20 | 1 |
| 140 | vobon | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | Bagdoom.com has been designed with splendid ex... | Angel Investor | 0.0 | 1.0 | 1 | Profit Stage | 1 | 1.0 | 0 to 10 | 0 |
141 rows × 36 columns
df_sentiment = pd.DataFrame()
df_sentiment['attitude'] = sentiment_data_final['management_attitude']
df_sentiment['sentiment'] = sentiment_data_final['Sentiment']
df_sentiment['attitude'] = df_sentiment['attitude'].astype(str)
df_sentiment['sentiment'] = df_sentiment['sentiment'].astype(str)
df_sentiment['attitude'] = df_sentiment['attitude'].apply(lambda x: clean_text(x))
df_sentiment['sentiment'] = df_sentiment['sentiment'].apply(lambda x: x.replace("rt", ""))
df_sentiment.head()
| attitude | sentiment | |
|---|---|---|
| 0 | satisfactory | Good |
| 1 | oddly nice | Good |
| 2 | very nice | Good |
| 3 | nice | Good |
| 4 | satisfactory | Good |
def encode_labels(df):
for i,j in df_sentiment.iterrows():
if j['sentiment'] == 'Bad':
j['sentiment'] = 0
elif j['sentiment'] == 'Good':
j['sentiment']=1
elif j['sentiment']=='Moderate':
j['sentiment']=2
return df_sentiment
df_sentiment = encode_labels(df_sentiment)
df_sentiment
| attitude | sentiment | |
|---|---|---|
| 0 | satisfactory | 1 |
| 1 | oddly nice | 1 |
| 2 | very nice | 1 |
| 3 | nice | 1 |
| 4 | satisfactory | 1 |
| ... | ... | ... |
| 136 | nan | nan |
| 137 | nan | nan |
| 138 | nan | nan |
| 139 | nan | nan |
| 140 | nan | nan |
141 rows × 2 columns
categorical_features = ['BusinessTravel', 'Department', 'JobRole', 'Education',
'EducationField', 'Gender', 'MaritalStatus', 'OverTime']
numerical_features = ['Age', 'DailyRate', 'DistanceFromHome', 'EnvironmentSatisfaction',
'HourlyRate', 'JobInvolvement', 'JobLevel', 'JobSatisfaction',
'MonthlyIncome', 'MonthlyRate', 'NumCompaniesWorked', 'PercentSalaryHike',
'PerformanceRating', 'RelationshipSatisfaction', 'StockOptionLevel',
'TotalWorkingYears', 'TrainingTimesLastYear', 'WorkLifeBalance',
'YearsAtCompany', 'YearsInCurrentRole', 'YearsSinceLastPromotion',
'YearsWithCurrManager']
to_drop = ['StandardHours', 'Over18', 'EmployeeCount', 'EmployeeNumber']
numerical_features = ['year_spent','exp_year','tech_in_management']
categorical_features = ['salary_satisfiction','working_hour_satisfiction','team_collaboration','company_premises',
'Sentiment','good_onboarding_process','kind_of_company','hiring_and_firing_rate','experienced_developer_number (2-5 years)','client_types',
'marketing_expenditure (monthly)','company_operation_technique',
'produce_the_actual_product','any_systematic_method_followed','stage_of_business',
'having_minimum_viable_product','have_testing_phase','keeping_promises']
#label encoding for categorical features
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
import joblib
df = data1.copy()
path = "/Users/tanvir/Documents/Dataset"
for i, feature in enumerate(categorical_features):
le = LabelEncoder()
# create directory to save label encoding models
if not os.path.exists(os.path.join(path, "TextEncoding")):
os.makedirs(os.path.join(path, "TextEncoding"))
# perform label encoding
le.fit(df[feature])
# save the encoder
joblib.dump(le, open(os.path.join(path, "TextEncoding/le_{}.sav".format(feature)), 'wb'))
# transfrom training data
df[feature] = le.transform(df[feature])
# get classes & remove first column to elude from dummy variable trap
columns = list(map(lambda x: feature+' '+str(x), list(le.classes_)))[1:]
# save classes
joblib.dump(columns,
open(os.path.join(path, "TextEncoding/le_{}_classes.sav".format(feature)), 'wb'))
import matplotlib.pyplot as plt
import seaborn as sns
#correlation
plt.figure(figsize = (15,15))
sns.heatmap(round(data1[numerical_features].corr(),2), annot = True,
mask = None, cmap = 'GnBu')
corr_mat = data1[numerical_features].corr()
plt.show()
#Bivariate analysis Correlation plot with categorical variables
plt.figure(figsize = (20,20))
sns.heatmap(round(df[categorical_features + numerical_features].corr(method = 'spearman'),2), annot = True,
mask = None, cmap = 'GnBu')
corr_mat = df[numerical_features + categorical_features].corr()
plt.show()
#correlated Features
s = corr_mat.unstack()
so = s.sort_values(kind = "quicksort").drop_duplicates()
res1 = so[so>=0.5]
print(res1)
team_collaboration Sentiment 0.503132
working_hour_satisfiction Sentiment 0.504386
Sentiment salary_satisfiction 0.505844
kind_of_company marketing_expenditure (monthly) 0.508831
any_systematic_method_followed 0.529112
Sentiment company_premises 0.537412
good_onboarding_process team_collaboration 0.552040
company_premises working_hour_satisfiction 0.575587
team_collaboration 0.589055
exp_year year_spent 0.611114
company_premises good_onboarding_process 0.623367
salary_satisfiction working_hour_satisfiction 0.665733
produce_the_actual_product stage_of_business 0.684667
good_onboarding_process working_hour_satisfiction 0.686705
company_premises salary_satisfiction 0.691543
team_collaboration salary_satisfiction 0.695224
produce_the_actual_product having_minimum_viable_product 0.700637
stage_of_business company_operation_technique 0.703750
working_hour_satisfiction team_collaboration 0.709646
stage_of_business having_minimum_viable_product 0.719935
have_testing_phase having_minimum_viable_product 0.736178
stage_of_business have_testing_phase 0.742638
having_minimum_viable_product company_operation_technique 0.743653
good_onboarding_process salary_satisfiction 0.744477
company_operation_technique produce_the_actual_product 0.756428
have_testing_phase 0.775159
any_systematic_method_followed having_minimum_viable_product 0.818174
have_testing_phase any_systematic_method_followed 0.819522
company_operation_technique any_systematic_method_followed 0.826170
stage_of_business any_systematic_method_followed 0.828700
produce_the_actual_product any_systematic_method_followed 0.840902
have_testing_phase 0.851373
year_spent year_spent 1.000000
dtype: float64
def plot_category(feature, figsize=None):
yes_count = df[df['Attrition']== 'Yes'].groupby([feature]).size()
no_count = df[df['Attrition'] == 'No'].groupby([feature]).size()
labels = no_count.index
x = np.arange(len(labels)) # the label locations
width = 0.35 # the width of the bars
if figsize:
fig, ax = plt.subplots(figsize=figsize)
else:
fig, ax = plt.subplots()
rects1 = ax.bar(x-width/2, round(yes_count*100/df.groupby([feature]).size(), 2),
width, label='Yes')
rects2 = ax.bar(x+width/2, round(no_count*100/df.groupby([feature]).size(), 2),
width, label='No')
ax.set_ylabel('Count')
ax.set_title('Based on %s'%feature)
ax.set_xticks(x)
ax.set_xticklabels(labels, rotation=80)
ax.legend();
ax.bar_label(rects1, padding=1)
ax.bar_label(rects2, padding=1)
fig.tight_layout()
plt.show()